package crawler.taobao;

import java.util.ArrayList;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import bean.Grass;

public class TB {
	// 链接的超时时间
	private final int timeout = 5000;

	public void begin() {
		try {
			Document doc = Jsoup.connect("http://www.taobao.com")
					.timeout(timeout).get();
			// 得到所有的类目，以及连接
			Element div = doc.getElementById("J_CategoryHover");
			Elements subdiv = div.getElementsByAttributeValueContaining(
					"class", "category-item");
			
			ArrayList<Grass> firstlist = new ArrayList<Grass>();
			for (Element e : subdiv) {
				Elements f = e.getElementsByAttributeValue("class", "clearfix");
				for (Element g : f) {
					Elements a = g.getElementsByTag("a");
					for (Element el : a) {
						Grass grass=new Grass();
						grass.setHref(el.attr("href"));
						grass.setText(el.text());
						firstlist.add(grass);
					}
				}
			}
			//得到所有二级页面里的商品连接
			for(Grass grass:firstlist){
				
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}
